Meteorology basic analyses for OUP climate chapter.

This script does not use any specialized time series R packages to look at some of the questions of interest to be addressed in the climate chapter. It is simply an initial basic graphical look at the stations and variables.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5     ✔ purrr   0.3.4
## ✔ tibble  3.1.6     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
path_to_files <- "../data/processed_data/"

yearly data

y <- read_csv(paste0(path_to_files, "met_yearly_gap_filled.csv")) %>% 
  mutate(sta = as.factor(sta))
## Rows: 110 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (11): sta, year, airt, maxair, minair, ppt, rh, sol_total, sol_mean, min...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(y)
##  sta          year           airt           maxair          minair       
##  40:34   Min.   :1988   Min.   :11.91   Min.   :32.01   Min.   :-40.000  
##  42:33   1st Qu.:2001   1st Qu.:13.15   1st Qu.:37.13   1st Qu.:-16.793  
##  49:23   Median :2008   Median :13.82   Median :38.70   Median :-14.870  
##  50:20   Mean   :2007   Mean   :13.88   Mean   :38.51   Mean   :-15.885  
##          3rd Qu.:2015   3rd Qu.:14.67   3rd Qu.:39.84   3rd Qu.:-13.225  
##          Max.   :2021   Max.   :16.43   Max.   :48.72   Max.   : -3.797  
##                                                                          
##       ppt              rh          sol_total         sol_mean     
##  Min.   :122.0   Min.   :30.93   Min.   :357740   Min.   : 491.4  
##  1st Qu.:220.9   1st Qu.:38.23   1st Qu.:746134   1st Qu.:1020.9  
##  Median :247.9   Median :40.89   Median :767194   Median :1050.1  
##  Mean   :262.5   Mean   :41.02   Mean   :768173   Mean   :1050.8  
##  3rd Qu.:301.7   3rd Qu.:43.78   3rd Qu.:797182   3rd Qu.:1089.7  
##  Max.   :599.4   Max.   :50.04   Max.   :855454   Max.   :1169.4  
##                                  NA's   :23       NA's   :23      
##      minsol            maxsol     
##  Min.   :-4.8580   Min.   :444.8  
##  1st Qu.:-0.2505   1st Qu.:578.3  
##  Median :-0.1913   Median :584.3  
##  Mean   :-0.5667   Mean   :583.2  
##  3rd Qu.:-0.1484   3rd Qu.:612.5  
##  Max.   : 0.0000   Max.   :682.3  
##  NA's   :23        NA's   :23

airt

# airt
ggplot(y, aes(x = year, y = airt, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Yearly Mean Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

y %>% 
  ggplot(aes(x = sta, y = airt, color = sta)) +
    geom_boxplot() +
  labs(title = "Mean Annual Air Temperature by Station",
       x = "Station",
       y = "Air Temp (C) ") +
  theme(legend.position="none")

y %>% 
  ggplot() +
  geom_histogram(aes(x = airt, fill = sta), bins = 30) +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  labs(title = "Distribution of Mean Annual Air Temperature",
       x = "Air Temp (C)",
       y = "Count")

minair

# minair
ggplot(y, aes(x = year, y = minair, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Yearly Minimum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

y %>% 
  ggplot(aes(x = sta, y = minair, color = sta)) +
  geom_boxplot() +
  labs(title = "Minimul Annual Air Temperature by Station",
       x = "Station",
       y = "Minimum Air Temp (C) ") +
  theme(legend.position="none")

y %>% 
  ggplot() +
  geom_histogram(aes(x = minair, fill = sta), bins = 30) +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  labs(title = "Distribution of Minimum Annual Air Temperature",
       x = "Air Temp (C)",
       y = "Count")

maxair

# maxair
ggplot(y, aes(x = year, y = maxair, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Yearly Maximum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

y %>% 
  ggplot(aes(x = sta, y = maxair, color = sta)) +
  geom_boxplot() +
  labs(title = "Maximum Annual Air Temperature by Station",
       x = "Station",
       y = "Maximum Air Temp (C) ") +
  theme(legend.position="none")

y %>% 
  ggplot() +
  geom_histogram(aes(x = maxair, fill = sta), bins = 30) +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  labs(title = "Distribution of Maximum Annual Air Temperature",
       x = "Maximum Air Temp (C)",
       y = "Count")

rh

# rh 
ggplot(y, aes(x = year, y = rh, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm", color = "lightblue") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Annual Mean Relative Humidity")
## `geom_smooth()` using formula 'y ~ x'

y %>% 
  ggplot(aes(x = sta, y = rh, color = sta)) +
  geom_boxplot() +
  labs(title = "Mean Annual Relative Humidity by Station",
       x = "Station",
       y = "Relative Humidity (%) ") +
  theme(legend.position="none")

y %>% 
  ggplot() +
  geom_histogram(aes(x = rh, fill = sta), bins = 30) +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  labs(title = "Distribution of Mean Annual Relative Humidity",
       x = "Relative Humidity (%)",
       y = "Count")

ppt

# ppt
ggplot(y, aes(x = year, y = ppt, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Yearly Total Precipitation (mm)")
## `geom_smooth()` using formula 'y ~ x'

y %>% 
  ggplot(aes(x = sta, y = ppt, color = sta)) +
  geom_boxplot() +
  labs(title = "Total Annual Precipitation by Station",
       x = "Station",
       y = "Precipitation (mm)") +
  theme(legend.position="none")

y %>% 
  ggplot() +
  geom_histogram(aes(x = ppt, fill = sta), bins = 30) +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  labs(title = "Distribution of Total Annual Precipitation",
       x = "Precipitation (mm)",
       y = "Count")

observe temp and ppt variables by station

y_met_vars_by_sta <- function(data, station) {
  data %>% 
    filter(sta == station) %>% 
    select(sta:ppt) %>% 
    pivot_longer(airt:ppt) %>% 
    ggplot(., aes(x = year, y = value, color = name)) +
    geom_line() +
    geom_smooth(method = "lm") +
    facet_wrap(~ name, scales = "free_y") +
    theme(legend.position="none") +
    ggtitle(paste0("Station ", {{ station }}))
}


y_met_vars_by_sta(y, "40")
## `geom_smooth()` using formula 'y ~ x'

y_met_vars_by_sta(y, "42")
## `geom_smooth()` using formula 'y ~ x'

y_met_vars_by_sta(y, "49")
## `geom_smooth()` using formula 'y ~ x'

y_met_vars_by_sta(y, "50")
## `geom_smooth()` using formula 'y ~ x'


monthly data

m <- read_csv(paste0(path_to_files, "met_monthly_gap_filled.csv")) %>% 
  mutate(sta = as.factor(sta))
## Rows: 1320 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (12): sta, month, year, airt, maxair, minair, ppt, rh, sol_total, sol_m...
## date  (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(m)
##  sta          month            year           date                 airt       
##  40:408   Min.   : 1.00   Min.   :1988   Min.   :1988-01-01   Min.   :-1.320  
##  42:396   1st Qu.: 3.75   1st Qu.:2001   1st Qu.:2001-02-22   1st Qu.: 6.362  
##  49:276   Median : 6.50   Median :2008   Median :2008-03-16   Median :13.718  
##  50:240   Mean   : 6.50   Mean   :2007   Mean   :2007-07-13   Mean   :13.876  
##           3rd Qu.: 9.25   3rd Qu.:2015   3rd Qu.:2015-02-01   3rd Qu.:21.882  
##           Max.   :12.00   Max.   :2021   Max.   :2021-12-01   Max.   :29.569  
##                                                                               
##      maxair           minair             ppt                rh       
##  Min.   : 8.985   Min.   :-40.000   Min.   :  0.000   Min.   :13.25  
##  1st Qu.:21.503   1st Qu.: -9.607   1st Qu.:  4.104   1st Qu.:31.30  
##  Median :28.745   Median : -2.836   Median : 13.524   Median :41.00  
##  Mean   :28.075   Mean   : -1.516   Mean   : 21.875   Mean   :41.02  
##  3rd Qu.:34.656   3rd Qu.:  7.375   3rd Qu.: 31.050   3rd Qu.:49.88  
##  Max.   :48.720   Max.   : 17.910   Max.   :183.147   Max.   :82.87  
##                                                                      
##    sol_total         sol_mean          minsol             maxsol     
##  Min.   : 14114   Min.   : 18.97   Min.   :-4.85800   Min.   :123.3  
##  1st Qu.: 43801   1st Qu.: 61.47   1st Qu.:-0.23964   1st Qu.:406.0  
##  Median : 65743   Median : 89.56   Median :-0.18088   Median :491.3  
##  Mean   : 64014   Mean   : 87.57   Mean   :-0.23463   Mean   :477.7  
##  3rd Qu.: 81518   3rd Qu.:110.50   3rd Qu.:-0.07166   3rd Qu.:558.1  
##  Max.   :105335   Max.   :144.02   Max.   : 0.00000   Max.   :682.3  
##  NA's   :276      NA's   :276      NA's   :276        NA's   :276

airt

ggplot(m, aes(x = date, y = airt, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Monthly Mean Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

m %>% 
  ggplot(aes(x = sta, y = airt, color = sta)) +
  geom_boxplot() +
  labs(title = "Mean Monthly Air Temperature by Station \n(months combined)",
       x = "Station",
       y = "Air Temp (C) ")  +
  theme(legend.position="none")

m %>% 
  ggplot(aes(x = sta, y = airt, color = sta)) +
  geom_boxplot() +
  labs(title = "Mean Monthly Air Temperature by month",
       x = "Station",
       y = "Air Temp (C) ") +
  facet_wrap(~ month(date)) +
  theme(legend.position="none")

m %>% 
  group_by(month(date)) %>% 
  ggplot(aes(x = as.factor(month(date)), y = airt, color = sta)) +
  geom_boxplot() +
  labs(title = "Mean Monthly Air Temperature \nby Station and Month of Year",
       x = "Station",
       y = "Air Temp (C) ") +
  theme(legend.position="none") +
  facet_wrap(~ sta)

m %>% 
  ggplot() +
  geom_histogram(aes(x = airt, fill = sta), bins = 75) +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  labs(title = "Distribution of Mean Monthly Air Temperature",
       x = "Air Temp (C)",
       y = "Count")

minair

ggplot(m, aes(x = date, y = minair, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Monthly Minimum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

m %>% 
  ggplot(aes(x = sta, y = minair, color = sta)) +
  geom_boxplot() +
  labs(title = "Minimum Monthly Air Temperature by Station \n(months combined)",
       x = "Station",
       y = "Minimum Air Temp (C) ")  +
  theme(legend.position="none")

m %>% 
  ggplot(aes(x = sta, y = minair, color = sta)) +
  geom_boxplot() +
  labs(title = "Minimum Monthly Air Temperature by month",
       x = "Station",
       y = "Minimum Air Temp (C) ") +
  facet_wrap(~ month(date)) +
  theme(legend.position="none")

m %>% 
  group_by(month(date)) %>% 
  ggplot(aes(x = as.factor(month(date)), y = minair, color = sta)) +
  geom_boxplot() +
  labs(title = "Minimum Monthly Air Temperature \nby Station and Month of Year",
       x = "Station",
       y = "Minimum Air Temp (C) ") +
  theme(legend.position="none") +
  facet_wrap(~ sta)

m %>% 
  ggplot() +
  geom_histogram(aes(x = minair, fill = sta), bins = 75) +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  labs(title = "Distribution of Minimum Monthly Air Temperature",
       x = "Air Temp (C)",
       y = "Count")

maxair

ggplot(m, aes(x = date, y = maxair, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Monthly Maximum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

m %>% 
  ggplot(aes(x = sta, y = maxair, color = sta)) +
  geom_boxplot() +
  labs(title = "Maximum Monthly Air Temperature by Station \n(months combined)",
       x = "Station",
       y = "Maximum Air Temp (C) ")  +
  theme(legend.position="none")

m %>% 
  ggplot(aes(x = sta, y = maxair, color = sta)) +
  geom_boxplot() +
  labs(title = "Maximum Monthly Air Temperature by month",
       x = "Station",
       y = "Maximum Air Temp (C) ") +
  facet_wrap(~ month(date)) +
  theme(legend.position="none")

m %>% 
  group_by(month(date)) %>% 
  ggplot(aes(x = as.factor(month(date)), y = maxair, color = sta)) +
  geom_boxplot() +
  labs(title = "Maximum Monthly Air Temperature \nby Station and Month of Year",
       x = "Station",
       y = "Maximum Air Temp (C) ") +
  theme(legend.position="none") +
  facet_wrap(~ sta)

m %>% 
  ggplot() +
  geom_histogram(aes(x = maxair, fill = sta), bins = 75) +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  labs(title = "Distribution of Maximum Monthly Air Temperature",
       x = "Air Temp (C)",
       y = "Count")

rh

ggplot(m, aes(x = date, y = rh, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Mean Relative Humidity")
## `geom_smooth()` using formula 'y ~ x'

m %>% 
  ggplot(aes(x = sta, y = rh, color = sta)) +
  geom_boxplot() +
  labs(title = "Mean Relative Humidity by Station \n(months combined)",
       x = "Station",
       y = "Relative Humidity (%)")  +
  theme(legend.position="none")

m %>% 
  ggplot(aes(x = sta, y = rh, color = sta)) +
  geom_boxplot() +
  labs(title = "Monthly Mean Relative Humidity by month",
       x = "Station",
       y = "Relative Humidity (%)") +
  facet_wrap(~ month(date)) +
  theme(legend.position="none")

m %>% 
  group_by(month(date)) %>% 
  ggplot(aes(x = as.factor(month(date)), y = rh, color = sta)) +
  geom_boxplot() +
  labs(title = "Monthly Mean Relative Humidity \nby Station and Month of Year",
       x = "Station",
       y = "Relative Humidity (%)") +
  theme(legend.position="none") +
  facet_wrap(~ sta)

m %>% 
  ggplot() +
  geom_histogram(aes(x = rh, fill = sta), bins = 75) +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  labs(title = "Distribution of Monthly Mean Relative Humidity",
       x = "Relative Humidity (%",
       y = "Count")

ppt

ggplot(m, aes(x = date, y = ppt, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Monthly Total Precipitation (mm)")
## `geom_smooth()` using formula 'y ~ x'

m %>% 
  ggplot(aes(x = sta, y = ppt, color = sta)) +
  geom_boxplot() +
  labs(title = "Monthly Total Precipitation by Station \n(months combined)",
       x = "Station",
       y = "Precipitation (mm)")  +
  theme(legend.position="none")

m %>% 
  ggplot(aes(x = sta, y = ppt, color = sta)) +
  geom_boxplot() +
  labs(title = "Monthly Total Precipitation by month",
       x = "Station",
       y = "Precipitation (mm)") +
  facet_wrap(~ month(date)) +
  theme(legend.position="none")

m %>% 
  group_by(month(date)) %>% 
  ggplot(aes(x = as.factor(month(date)), y = ppt, color = sta)) +
  geom_boxplot() +
  labs(title = "Monthly Total Precipitation \nby Station and Month of Year",
       x = "Station",
       y = "Precip (mm)") +
  theme(legend.position="none") +
  facet_wrap(~ sta)

m %>% 
  ggplot() +
  geom_histogram(aes(x = ppt, fill = sta), bins = 75) +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  labs(title = "Distribution of Monthly Total Precipitation",
       x = "Precipitation (mm)",
       y = "Count")

observe temp and ppt variables by station

m_met_vars_by_sta <- function(data, station) {
  data %>% 
    filter(sta == station) %>% 
    select(sta:ppt) %>% 
    pivot_longer(airt:ppt) %>% 
    ggplot(., aes(x = date, y = value, color = name)) +
    geom_line() +
    geom_smooth(method = "lm") +
    facet_wrap(~ name, scales = "free_y") +
    theme(legend.position="none") +
    ggtitle(paste0("Station ", {{ station }}, " - Monthly"))
}

m_met_vars_by_sta(m, "40")
## `geom_smooth()` using formula 'y ~ x'

m_met_vars_by_sta(m, "42")
## `geom_smooth()` using formula 'y ~ x'

m_met_vars_by_sta(m, "49")
## `geom_smooth()` using formula 'y ~ x'

m_met_vars_by_sta(m, "50")
## `geom_smooth()` using formula 'y ~ x'

by month of year all stations

by_month_all_sta <- function(data, var, title) {
  ggplot(data, aes(x = year, y = {{ var }}, color = sta)) +
    geom_line() +
    facet_wrap(~ month, scales = "free_y") +
    ggtitle({{ title }})
}

by_month_all_sta(m, airt, "Monthly Mean Air Temperature (C)")  

by_month_all_sta(m, maxair, "Monthly Maximum Air Temperature (C)")  

by_month_all_sta(m, minair, "Monthly Minimum Air Temperature (C)")  

by_month_all_sta(m, ppt, "Monthly Total Precipitation (mm)")  

by month of year for individual stations and their vars with lm

by_month_lm <- function(data, var, station, title) {
  m %>% 
    filter(sta == station) %>% 
    ggplot(., aes(x = year, y = {{ var }})) +
    geom_line() +
    geom_smooth(method = "lm") +
    facet_wrap(~ month, scales = "free_y") +
    ggtitle({{ title }})
}

by_month_lm(m, airt, "40", "Station 40 Monthly Mean Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, airt, "42", "Station 42 Monthly Mean Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, airt, "49", "Station 49 Monthly Mean Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, airt, "50", "Station 50 Monthly Mean Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, minair, "40", "Station 40 Monthly Minimum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, minair, "42", "Station 42 Monthly Minimum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, minair, "49", "Station 49 Monthly Minimum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, minair, "50", "Station 50 Monthly Minimum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, maxair, "40", "Station 40 Monthly Maximum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, maxair, "42", "Station 42 Monthly Maximum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, maxair, "49", "Station 49 Monthly Maximum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, maxair, "50", "Station 50 Monthly Maximum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, rh, "40", "Station 40 Monthly Mean Relative Humidity (%)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, rh, "42", "Station 42 Monthly Mean Relative Humidity (%)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, rh, "49", "Station 49 Monthly Mean Relative Humidity (%)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, rh, "50", "Station 50 Monthly Mean Relative Humidity (%)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, ppt, "40", "Station 40 Monthly Precipitation (mm)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, ppt, "42", "Station 42 Monthly Precipitation (mm)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, ppt, "49", "Station 49 Monthly Precipitation (mm)")
## `geom_smooth()` using formula 'y ~ x'

by_month_lm(m, ppt, "50", "Station 50 Monthly Precipitation (mm)")
## `geom_smooth()` using formula 'y ~ x'


daily data

d <- read_csv(paste0(path_to_files, "met_daily_gap_filled.csv")) %>% 
  mutate(sta = as.factor(sta))
## Rows: 40178 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (20): sta, airt, maxair, minair, ppt, rh, sol_total, sol_mean, minsol, ...
## date  (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(d)
##  sta             date                 airt             maxair      
##  40:12419   Min.   :1988-01-01   Min.   :-21.680   Min.   :-18.47  
##  42:12053   1st Qu.:2001-03-02   1st Qu.:  6.367   1st Qu.: 14.37  
##  49: 8401   Median :2008-04-01   Median : 14.186   Median : 22.75  
##  50: 7305   Mean   :2007-07-28   Mean   : 13.914   Mean   : 21.95  
##             3rd Qu.:2015-02-15   3rd Qu.: 21.950   3rd Qu.: 30.12  
##             Max.   :2021-12-31   Max.   : 33.070   Max.   : 48.72  
##                                                                    
##      minair             ppt                rh            sol_total   
##  Min.   :-40.000   Min.   : 0.0000   Min.   :  2.804   Min.   :   0  
##  1st Qu.: -1.397   1st Qu.: 0.0000   1st Qu.: 25.782   1st Qu.:1467  
##  Median :  5.854   Median : 0.0000   Median : 38.866   Median :2118  
##  Mean   :  5.873   Mean   : 0.7187   Mean   : 41.042   Mean   :2103  
##  3rd Qu.: 13.940   3rd Qu.: 0.0000   3rd Qu.: 53.912   3rd Qu.:2729  
##  Max.   : 26.840   Max.   :69.1940   Max.   :106.271   Max.   :3869  
##                                                        NA's   :8401  
##     sol_mean          minsol           maxsol      no_record_flag   
##  Min.   :  0.00   Min.   :-4.858   Min.   :  0.0   Min.   : 0.0000  
##  1st Qu.: 61.12   1st Qu.:-0.187   1st Qu.:294.8   1st Qu.: 0.0000  
##  Median : 88.23   Median :-0.091   Median :378.4   Median : 0.0000  
##  Mean   : 87.63   Mean   :-0.118   Mean   :368.3   Mean   : 0.3215  
##  3rd Qu.:113.71   3rd Qu.:-0.041   3rd Qu.:432.1   3rd Qu.: 0.0000  
##  Max.   :161.22   Max.   : 0.359   Max.   :682.3   Max.   :24.0000  
##  NA's   :8401     NA's   :8401     NA's   :8401                     
##    airt_miss         maxair_miss      minair_miss        ppt_miss       
##  Min.   : 0.00000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.00000  
##  1st Qu.: 0.00000   1st Qu.: 0.000   1st Qu.: 0.000   1st Qu.: 0.00000  
##  Median : 0.00000   Median : 0.000   Median : 0.000   Median : 0.00000  
##  Mean   : 0.07507   Mean   : 1.096   Mean   : 1.092   Mean   : 0.09139  
##  3rd Qu.: 0.00000   3rd Qu.: 0.000   3rd Qu.: 0.000   3rd Qu.: 0.00000  
##  Max.   :24.00000   Max.   :24.000   Max.   :24.000   Max.   :24.00000  
##                                                                         
##     rh_miss           sol_miss       minsol_miss      maxsol_miss    
##  Min.   : 0.0000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 0.0000   1st Qu.: 0.000   1st Qu.: 0.000   1st Qu.: 0.000  
##  Median : 0.0000   Median : 0.000   Median : 0.000   Median : 0.000  
##  Mean   : 0.2326   Mean   : 6.726   Mean   : 7.159   Mean   : 7.111  
##  3rd Qu.: 0.0000   3rd Qu.:24.000   3rd Qu.:24.000   3rd Qu.:24.000  
##  Max.   :24.0000   Max.   :24.000   Max.   :24.000   Max.   :24.000  
##                                                                      
##  number_hrly_records
##  Min.   :24         
##  1st Qu.:24         
##  Median :24         
##  Mean   :24         
##  3rd Qu.:24         
##  Max.   :24         
## 

airt

ggplot(d, aes(x = date, y = airt, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Daily Mean Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

minair

ggplot(d, aes(x = date, y = minair, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Daily Minimum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

maxair

ggplot(d, aes(x = date, y = maxair, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Daily Maximum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

rh

ggplot(d, aes(x = date, y = rh, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm", color = "black") +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Daily Mean Relative Humidity (%)")
## `geom_smooth()` using formula 'y ~ x'

ppt

ggplot(d, aes(x = date, y = ppt, color = sta)) +
  geom_point(size = 0.2) +
  facet_wrap(~ sta) +
  theme(legend.position="none") +
  ggtitle("Daily Total Precipitation (mm)")


look at data during the monsoon period -

Petrie et al. (2014) defined the monsoon period as DOY 181-273.

d_monsoon <- d %>% 
  mutate(DOY = yday(date)) %>% 
  filter(DOY >= 181 & DOY <= 273)
# to aid in plotting monsoon data, taking the daily data, summarizing
# it by year, and then plotting
y_monsoon <- d_monsoon %>% 
  mutate(year = year(date)) %>% 
  group_by(sta, year) %>% 
  summarize(airt = mean(airt),
            minair = min(minair),
            maxair = max(maxair),
            ppt = sum(ppt))
## `summarise()` has grouped output by 'sta'. You can override using the `.groups`
## argument.
y_monsoon %>% 
  ggplot(., aes(x = year, y = airt, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) + 
  ggtitle("Monsoon Mean Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

y_monsoon %>% 
  ggplot(., aes(x = year, y = minair, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) + 
  ggtitle("Monsoon Minimum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

y_monsoon %>% 
  ggplot(., aes(x = year, y = maxair, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) + 
  ggtitle("Monsoon Maximum Air Temperature (C)")
## `geom_smooth()` using formula 'y ~ x'

y_monsoon %>% 
  ggplot(., aes(x = year, y = ppt, color = sta)) +
  geom_line() +
  geom_smooth(method = "lm") +
  facet_wrap(~ sta) + 
  ggtitle("Monsoon Total Precipitation (mm)")
## `geom_smooth()` using formula 'y ~ x'